TO DO LIST
- riduce the number of genres-> produce a list
library(ggplot2)
library(tidyverse)
── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ lubridate 1.9.2 ✔ tibble 3.2.1
✔ purrr 1.0.1 ✔ tidyr 1.3.0
✔ readr 2.1.4 ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(lubridate)
library(dplyr)
Explore columns
for (i in 1:dim(tracks)[2]){
print(paste0(i,"----",colnames(tracks)[i]))
print(tracks[1:10,i])
}
[1] "1----artists"
[1] "Nayt" "Mahmood"
[3] "NASKA" "NASKA"
[5] "Bresh" "Marracash"
[7] "Rino Gaetano" "NASKA"
[9] "Pinguini Tattici Nucleari" "Marracash"
[1] "2----album_name"
[1] "Un bacio (Deluxe Edition)" "Brividi"
[3] "REBEL" "REBEL"
[5] "ORO BLU" "NOI, LORO, GLI ALTRI"
[7] "Q Concert" "REBEL (Deluxe)"
[9] "Giovani Wannabe" "NOI, LORO, GLI ALTRI"
[1] "3----year"
[1] 2016 2022 2022 2022 2022 2021 1981 2022 2022 2021
[1] "4----season"
[1] "Spring" "Winter" "Spring" "Spring" "Spring" "Autumn" "missing"
[8] "Autumn" "Spring" "Autumn"
[1] "5----popularity"
[1] 32 65 53 46 59 64 46 55 63 66
[1] "6----acousticness"
[1] 0.50300 0.44800 0.02820 0.00162 0.36900 0.48500 0.05690 0.01360
[9] 0.04090 0.57000
[1] "7----danceability"
[1] 0.792 0.523 0.455 0.594 0.489 0.614 0.566 0.405 0.739 0.727
[1] "8----energy"
[1] 0.631 0.614 0.677 0.920 0.367 0.464 0.376 0.616 0.810 0.698
[1] "9----instrumentalness"
[1] 0 0 0 0 0 0 0 0 0 0
[1] "10----liveness"
[1] 0.1420 0.2540 0.5640 0.1590 0.1980 0.0812 0.4740 0.1090 0.1220 0.1100
[1] "11----loudness"
[1] -11.908 -4.435 -5.041 -5.566 -10.254 -8.473 -11.643 -6.442
[9] -5.317 -9.676
[1] "12----speechiness"
[1] 0.0992 0.0347 0.0338 0.0519 0.0878 0.0803 0.0443 0.0347 0.0311 0.3650
[1] "13----tempo"
[1] 108.076 122.962 179.981 139.929 131.302 152.092 176.366 160.219
[9] 127.972 92.915
[1] "14----key"
[1] 10 7 4 2 0 9 0 1 11 8
[1] "15----mode"
[1] 1 1 1 1 1 1 1 1 0 1
[1] "16----duration_ms"
[1] 141111 199146 219846 170388 231602 195799 217629 179815 212966 234446
[1] "17----valence"
[1] 0.805 0.342 0.266 0.492 0.524 0.293 0.486 0.199 0.948 0.356
[1] "18----time_signature"
[1] 4 4 4 4 4 4 4 3 4 4
[1] "19----track.id"
[1] "3o5AiG9Omh5GWlBNKEVcA9" "1ZMGp9MTXbtAPvcKa0U3zS"
[3] "6fPGBlx8wsAxhoDn7BwiAH" "75U0n5xhZT3al2oC3I61rG"
[5] "6I28wnb48iMVVfyTSf4lkx" "0WgVvy1KelQxG6KBUukTWI"
[7] "5mA4wMDUbf9A2N0vzCR80R" "0jkBVkeS6L5NtvPn29NeIK"
[9] "7iLuBTHJSXM2HalKHFqEEy" "4gxRyOZefp95AXZFaztdtO"
[1] "20----genre_1"
[1] "italian hip hop" "italian adult pop" "emo rap italiano"
[4] "emo rap italiano" "italian hip hop" "italian hip hop"
[7] "canzone d'autore" "emo rap italiano" "bergamo indie"
[10] "italian hip hop"
[1] "21----genre_2"
[1] "italian" "italian" "italian" "italian" "italian" "italian" NA
[8] "italian" NA "italian"
Ranges
for (i in 5:18){
print(c(i,colnames(tracks)[i],range(tracks[,i])))
}
[1] "5" "popularity" "19" "95"
[1] "6" "acousticness" "0.000519" "0.921"
[1] "7" "danceability" "0.352" "0.877"
[1] "8" "energy" "0.228" "0.968"
[1] "9" "instrumentalness" "0"
[4] "0.014"
[1] "10" "liveness" "0.0344" "0.667"
[1] "11" "loudness" "-14.291" "-2.363"
[1] "12" "speechiness" "0.0261" "0.365"
[1] "13" "tempo" "74.836" "197.773"
[1] "14" "key" "0" "11"
[1] "15" "mode" "0" "1"
[1] "16" "duration_ms" "137562" "326893"
[1] "17" "valence" "0.0397" "0.948"
[1] "18" "time_signature" "3" "5"
numerical_values = c(5:15,17,18)
length(numerical_values)
[1] 13
distributions
numerical_values = 5:18
for(col in colnames(tracks)[numerical_values]){
bins = range(tracks[[col]])[2]-range(tracks[[col]])[1]
plot <- ggplot(data = tracks, mapping = aes(x =.data[[col]])) +
geom_histogram(bins =30, fill = "blue", color = "black", alpha = 0.7) +
labs(title = col, x = "", y = "Frequency") +
theme_bw()
print(plot)
}














# Add a column indicating the source data frame
df_list_with_names <- lapply(seq_along(dataset_list), function(i) {
dataset_list[[i]]$id <- names(dataset_list)[i]
# here you can add all kind of columns from the survey dataset
#dataset_list[[i]]$study <- dataset_survey[i,"Che.cosa.studi."]
dataset_list[[i]]$state <- dataset_survey[i,"In.che.stato.vivi."]
return(dataset_list[[i]])
})
# Combine the data frames into a single data frame
df_all_in_one <- bind_rows(df_list_with_names)
# Print the result
print(df_all_in_one[seq(1,1000,50),])
df_all_in_one=df_all_in_one[,-1]
table(df_all_in_one$genre_2)
alternative brighton classical hip house indie italian
45 1 57 142 23 88 2149
italiana italiano missing pop rock
53 98 15 813 173
numerical_values_new = numerical_values[-which(numerical_values%in%c(9,10))]
columns = colnames(df_all_in_one)[numerical_values_new]
#columns =colnames(tracks)[7:10]
for(i in 1:(length(columns)-1)){
for(j in (i+1):length(columns)){
plot = ggplot(df_all_in_one, aes(df_all_in_one[,columns[i]],df_all_in_one[,columns[j]] , colour = state )) +
geom_point()+
theme_bw()+
theme(legend.position = "none")+
labs(x =columns[i],y = columns[j] )
print(plot)
}
}


































































Blueprint for plots

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIFRPIERPIExJU1QNCi0gcmlkdWNlIHRoZSBudW1iZXIgb2YgZ2VucmVzLT4gcHJvZHVjZSBhIGxpc3QNCg0KYGBge3J9DQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeShkcGx5cikNCmBgYA0KDQpgYGB7cn0NCmRhdGFzZXRfbGlzdCA9IHJlYWRSRFMoIi4uLy4uL2RhdGEvZGF0YV9nZW5yZXNfY2xlYW5lZC5SRGF0YSIpDQp0cmFja3MgPSBkYXRhc2V0X2xpc3RbWzFdXQ0KZGltKHRyYWNrcykNCmhlYWQodHJhY2tzKQ0KdHJhY2tzID0gdHJhY2tzWywtMV0NCmBgYA0KDQoNCiMgRXhwbG9yZSBjb2x1bW5zDQpgYGB7cn0NCmZvciAoaSBpbiAxOmRpbSh0cmFja3MpWzJdKXsNCiAgcHJpbnQocGFzdGUwKGksIi0tLS0iLGNvbG5hbWVzKHRyYWNrcylbaV0pKQ0KICBwcmludCh0cmFja3NbMToxMCxpXSkNCn0NCmBgYA0KIyBSYW5nZXMNCmBgYHtyfQ0KbnVtZXJpY2FsX3ZhbHVlcyA9IDU6MTgNCmZvciAoaSBpbiBudW1lcmljYWxfdmFsdWVzKXsNCiAgcHJpbnQoYyhpLGNvbG5hbWVzKHRyYWNrcylbaV0scmFuZ2UodHJhY2tzWyxpXSkpKQ0KfQ0KbGVuZ3RoKG51bWVyaWNhbF92YWx1ZXMpDQpgYGANCg0KDQojIGRpc3RyaWJ1dGlvbnMNCmBgYHtyfQ0KDQpmb3IoY29sIGluIGNvbG5hbWVzKHRyYWNrcylbbnVtZXJpY2FsX3ZhbHVlc10pew0KICBiaW5zID0gcmFuZ2UodHJhY2tzW1tjb2xdXSlbMl0tcmFuZ2UodHJhY2tzW1tjb2xdXSlbMV0NCiAgcGxvdCA8LSBnZ3Bsb3QoZGF0YSA9IHRyYWNrcywgbWFwcGluZyA9IGFlcyh4ID0uZGF0YVtbY29sXV0pKSArDQogIGdlb21faGlzdG9ncmFtKGJpbnMgPTMwLCBmaWxsID0gImJsdWUiLCBjb2xvciA9ICJibGFjayIsIGFscGhhID0gMC43KSArDQogIGxhYnModGl0bGUgPSBjb2wsIHggPSAiIiwgeSA9ICJGcmVxdWVuY3kiKSArDQogIHRoZW1lX2J3KCkNCiAgDQogIHByaW50KHBsb3QpDQp9DQoNCmBgYA0KYGBge3J9DQpkYXRhc2V0X3N1cnZleSA9IHJlYWQuY3N2KCIuLi8uLi9kYXRhL1F1ZXN0aW9uYXJpbyBTcG90aWZ5IChSaXNwb3N0ZSkgLSBSaXNwb3N0ZSBkZWwgbW9kdWxvIDEuY3N2IikNCmRhdGFzZXRfc3VydmV5ID0gcmVhZC5jc3YoIi4uLy4uL2RhdGEvU3BvdGlmeSBTdXJ2ZXkgIChSaXNwb3N0ZSkgLSBSaXNwb3N0ZSBkZWwgbW9kdWxvIDEuY3N2IikNCmBgYA0KYGBge3J9DQoNCm5hbWVfaWQgPSBzdWJzdHIoc3ViKCIuKi8oLispJCIsICJcXDEiLCBkYXRhc2V0X3N1cnZleVssIlBhc3RlLmhlcmUudGhlLmxpbmsiXSksIHN0YXJ0ID0gMSwgc3RvcCA9IDE1KQ0KZGF0YXNldF9zdXJ2ZXkkbmFtZS5pZCA9IG5hbWVfaWQNCmRhdGFzZXRfc3VydmV5X25ldyA9IGRhdGFzZXRfc3VydmV5Wyw0OjIyXQ0KY29sbmFtZXMoZGF0YXNldF9zdXJ2ZXkpDQpzYXZlUkRTKGRhdGFzZXRfc3VydmV5LCIuLi8uLi9kYXRhL1N1cnZleSIpDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyBBZGQgYSBjb2x1bW4gaW5kaWNhdGluZyB0aGUgc291cmNlIGRhdGEgZnJhbWUNCmRmX2xpc3Rfd2l0aF9uYW1lcyA8LSBsYXBwbHkoc2VxX2Fsb25nKGRhdGFzZXRfbGlzdCksIGZ1bmN0aW9uKGkpIHsNCiAgZGF0YXNldF9saXN0W1tpXV0kaWQgPC0gbmFtZXMoZGF0YXNldF9saXN0KVtpXQ0KICAjIGhlcmUgeW91IGNhbiBhZGQgYWxsIGtpbmQgb2YgY29sdW1ucyBmcm9tIHRoZSBzdXJ2ZXkgZGF0YXNldA0KICAjZGF0YXNldF9saXN0W1tpXV0kc3R1ZHkgPC0gZGF0YXNldF9zdXJ2ZXlbaSwiQ2hlLmNvc2Euc3R1ZGkuIl0NCiAgZGF0YXNldF9saXN0W1tpXV0kc3RhdGUgPC0gZGF0YXNldF9zdXJ2ZXlbaSwiSW4uY2hlLnN0YXRvLnZpdmkuIl0NCiAgcmV0dXJuKGRhdGFzZXRfbGlzdFtbaV1dKQ0KfSkNCg0KIyBDb21iaW5lIHRoZSBkYXRhIGZyYW1lcyBpbnRvIGEgc2luZ2xlIGRhdGEgZnJhbWUNCmRmX2FsbF9pbl9vbmUgPC0gYmluZF9yb3dzKGRmX2xpc3Rfd2l0aF9uYW1lcykNCg0KIyBQcmludCB0aGUgcmVzdWx0DQpwcmludChkZl9hbGxfaW5fb25lW3NlcSgxLDEwMDAsNTApLF0pDQpkZl9hbGxfaW5fb25lPWRmX2FsbF9pbl9vbmVbLC0xXQ0KYGBgDQpgYGB7cn0NCnRhYmxlKGRmX2FsbF9pbl9vbmUkZ2VucmVfMikNCmBgYA0KDQoNCg0KDQpgYGB7cn0NCm51bWVyaWNhbF92YWx1ZXNfbmV3ID0gbnVtZXJpY2FsX3ZhbHVlc1std2hpY2gobnVtZXJpY2FsX3ZhbHVlcyVpbiVjKDksMTApKV0NCmNvbHVtbnMgPSBjb2xuYW1lcyhkZl9hbGxfaW5fb25lKVtudW1lcmljYWxfdmFsdWVzX25ld10NCiNjb2x1bW5zID1jb2xuYW1lcyh0cmFja3MpWzc6MTBdDQpmb3IoaSBpbiAxOihsZW5ndGgoY29sdW1ucyktMSkpew0KICBmb3IoaiBpbiAoaSsxKTpsZW5ndGgoY29sdW1ucykpew0KICAgIHBsb3QgPSBnZ3Bsb3QoZGZfYWxsX2luX29uZSwgYWVzKGRmX2FsbF9pbl9vbmVbLGNvbHVtbnNbaV1dLGRmX2FsbF9pbl9vbmVbLGNvbHVtbnNbal1dICwgY29sb3VyID0gc3RhdGUgKSkgKyANCiAgICAgICAgZ2VvbV9wb2ludCgpKw0KICAgICAgICB0aGVtZV9idygpKw0KICAgICAgICB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAibm9uZSIpKw0KICAgICAgICBsYWJzKHggPWNvbHVtbnNbaV0seSA9IGNvbHVtbnNbal0gKQ0KICAgIHByaW50KHBsb3QpDQogIH0NCn0NCmBgYA0KDQoNCiMgQmx1ZXByaW50IGZvciBwbG90cyANCmBgYHtyfQ0KcGxvdCA9IGdncGxvdCh0cmFja3MsIGFlcyhlbmVyZ3ksbG91ZG5lc3MgLCBjb2xvdXIgPSBnZW5yZV8xICkpICsgDQogIGdlb21fcG9pbnQoKSsNCiAgdGhlbWVfYncoKSsNCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKQ0KDQpwcmludChwbG90KQ0KYGBgDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg==